Yao proposed an AB model that I think is equivalent to noisy percentile. Andrew Hearin proposed noisy percentile as an AB model to me during the KITP, but there was a reason that I ended up rejecting it as a possiblity, and he agreed. However, I deleted my notes on the topic, so I need to redo it to see why I ended up rejecting it.


In [35]:
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [36]:
from pearce.mocks import cat_dict
import numpy as np

In [37]:
from halotools.empirical_models import noisy_percentile
from halotools.utils.table_utils import compute_prim_haloprop_bins

In [38]:
cosmo_params = {'simname':'chinchilla', 'Lbox':400.0, 'scale_factors':[1.0]}
cat = cat_dict[cosmo_params['simname']](**cosmo_params)#construct the specified catalog!

In [39]:
cat.load(1.0, HOD='hsabRedMagic')

In [40]:
fiducial_point = {'logM0': 12.0, 'logM1': 13.2, 'alpha': 1.02,
                      'logMmin': 12.0, 'f_c':1.0, 'sigma_logM': 0.60}

In [41]:
fiducial_point['mean_occupation_centrals_assembias_param1'] = 1.0
fiducial_point['mean_occupation_satellites_assembias_param1'] = 1.0

In [42]:
hod_params = dict(fiducial_point)
cat.populate(hod_params)

In [43]:
cen_occ = cat.model.model_dictionary['centrals_occupation']

In [44]:
prim_haloprop = cat.halocat.halo_table[cen_occ.prim_haloprop_key]
sec_haloprop = cat.halocat.halo_table[cen_occ.sec_haloprop_key]

In [45]:
prim_haloprop_bins = compute_prim_haloprop_bins(prim_haloprop = prim_haloprop)

In [46]:
baseline_result = cen_occ.baseline_mean_occupation(prim_haloprop = prim_haloprop)
split = cen_occ.percentile_splitting_function(prim_haloprop = prim_haloprop)
no_edge_mask = ((split > 0) & (split < 1)) perturbation = cen_occ._galprop_perturbation( baseline_result = baseline_result[no_edge_mask], prim_haloprop = prim_haloprop[no_edge_mask], sec_haloprop = sec_haloprop[no_edge_mask], splitting_result = split[no_edge_mask]) final_result = baseline_result[:] final_result[no_edge_mask]+=perturbation

In [47]:
final_result = cen_occ.mean_occupation(prim_haloprop = prim_haloprop, sec_haloprop = sec_haloprop)

In [48]:
bin_slice = prim_haloprop_bins%5 == 0

In [49]:
pal = sns.cubehelix_palette(len(set(prim_haloprop_bins[bin_slice])), rot=-.5, dark=.3)

In [50]:
random_idxs = np.random.choice(prim_haloprop_bins[bin_slice].shape[0], 10000, replace=False)

In [51]:
fig = plt.figure(figsize = (12.5,10))
#plt.plot(prim_haloprop_bins[bin_slice][sorted_idxs], baseline_result[bin_slice][sorted_idxs])
ax = sns.boxplot(x = prim_haloprop_bins[bin_slice], y = final_result[bin_slice], palette=pal)
sns.stripplot(x = prim_haloprop_bins[bin_slice][random_idxs], y = final_result[bin_slice][random_idxs],
              jitter=True, size=4, color=".3", linewidth=0)
sns.plt.title('Decorated HOD Boxplot Central Occupation')
sns.plt.xlabel('Halo Mass Bin #')
sns.plt.ylabel('Halo Occupation')


Out[51]:
<matplotlib.text.Text at 0x7fd27424a310>

In [52]:
from halotools.utils import rank_order_percentile

In [57]:
A = 0.0
final_result_percentile = np.zeros_like(prim_haloprop) for mass_bin in np.unique(prim_haloprop_bins): idx_in_bin = np.where(mass_bin == prim_haloprop_bins) occ_in_bin = baseline_result[idx_in_bin] sec_haloprop_in_bin = sec_haloprop[idx_in_bin] sh_perc = rank_order_percentile(sec_haloprop_in_bin) final_result_percentile[idx_in_bin] = noisy_percentile(sh_perc, correlation_coeff= A)

In [58]:
baseline_upper_bound = 1.0
baseline_lower_bound = 0.0
def percentile_perturbation(sec_haloprop, baseline_result, splitting_result):
    perturbation = np.zeros(len(sec_haloprop))
    sh_perc = rank_order_percentile(sec_haloprop)

    strength = noisy_percentile(sh_perc, correlation_coeff = A) -0.5
    positive_strength_idx = strength > 0
    negative_strength_idx = strength < 0

    if len(baseline_result[positive_strength_idx]) > 0:
        base_pos = baseline_result[positive_strength_idx]
        split_pos = splitting_result[positive_strength_idx]
        type1_frac_pos = 1 - split_pos
        strength_pos = strength[positive_strength_idx]

        upper_bound1 = baseline_upper_bound - base_pos
        upper_bound2 = ((1 - type1_frac_pos)/type1_frac_pos)*(base_pos - baseline_lower_bound)
        upper_bound = np.minimum(upper_bound1, upper_bound2)
        perturbation[positive_strength_idx] = strength_pos*upper_bound

    if len(baseline_result[negative_strength_idx]) > 0:
        base_neg = baseline_result[negative_strength_idx]
        split_neg = splitting_result[negative_strength_idx]
        type1_frac_neg = 1 - split_neg
        strength_neg = strength[negative_strength_idx]

        lower_bound1 = baseline_lower_bound - base_neg
        lower_bound2 = (1 - type1_frac_neg)/type1_frac_neg*(base_neg - baseline_upper_bound)
        lower_bound = np.maximum(lower_bound1, lower_bound2)
        perturbation[negative_strength_idx] = np.abs(strength_neg)*lower_bound

    return perturbation

In [59]:
final_result_percentile = np.zeros_like(prim_haloprop)
for mass_bin in np.unique(prim_haloprop_bins):
    idx_in_bin = np.where(mass_bin == prim_haloprop_bins)
    occ_in_bin = baseline_result[idx_in_bin]
    sec_haloprop_in_bin = sec_haloprop[idx_in_bin]
    final_result_percentile[idx_in_bin] = occ_in_bin+ percentile_perturbation(sec_haloprop_in_bin, occ_in_bin, split[idx_in_bin])

In [60]:
fig = plt.figure(figsize = (12.5,10))
#plt.plot(prim_haloprop_bins[bin_slice][sorted_idxs], baseline_result[bin_slice][sorted_idxs])
ax = sns.boxplot(x = prim_haloprop_bins[bin_slice], y = final_result_percentile[bin_slice], palette=pal)
sns.stripplot(x = prim_haloprop_bins[bin_slice][random_idxs], y = final_result_percentile[bin_slice][random_idxs],
              jitter=True, size=4, color=".3", linewidth=0)
sns.plt.title('Noisy Percentile HOD Boxplot Central Occupation')
sns.plt.xlabel('Halo Mass Bin #')
sns.plt.ylabel('Halo Occupation')


Out[60]:
<matplotlib.text.Text at 0x7fd271370990>

In [68]:
from scipy.stats import poisson, rankdata


def get_ranks(data):
    rank = rankdata(data, 'ordinal') - 0.5
    rank /= len(data)
    return rank


def shuffle_ranks(original_ranks, correlation):    
    if correlation >= 1.0:
        return original_ranks
    if correlation <= -1.0:
        return 1.0 - original_ranks
    
    positive_correlation = (correlation > 0)
    correlation_magnitude = abs(correlation)
    
    if correlation_magnitude < 1.0e-8:
        return get_ranks(np.random.rand(*original_ranks.shape))
    
    x = (1.0-correlation_magnitude)/correlation_magnitude    
    scale = (x**0.37 * (1.0+x**0.6) * 0.25)
    ranks = np.random.normal(loc=(original_ranks if positive_correlation else (-original_ranks)), scale=scale)
    
    return get_ranks(ranks)


def get_cen_count(rank_conc, mean_n_cen, A_cen):
    new_rank = shuffle_ranks(rank_conc, A_cen)
    return (new_rank >= mean_n_cen).astype(np.int)

In [77]:
final_result_yao = np.zeros_like(prim_haloprop)
for mass_bin in np.unique(prim_haloprop_bins):
    idx_in_bin = np.where(mass_bin == prim_haloprop_bins)
    occ_in_bin = baseline_result[idx_in_bin]
    sec_haloprop_in_bin = sec_haloprop[idx_in_bin]
    sh_perc = rank_order_percentile(sec_haloprop_in_bin)

    final_result_yao[idx_in_bin] = get_cen_count(sh_perc, occ_in_bin, A_cen= 0.0)

In [78]:
np.unique(final_result_yao)


Out[78]:
<Column name='halo_mvir' dtype='float32' length=2>
0.0
1.0

In [76]:
np.unique(final_result_percentile)


Out[76]:
<Column name='halo_mvir' dtype='float32' length=16940449>
0.0
1.49012e-08
1.49013e-08
1.49018e-08
1.49018e-08
1.49019e-08
1.49021e-08
1.49021e-08
1.49022e-08
1.49025e-08
1.49027e-08
1.49027e-08
...
0.999999
0.999999
0.999999
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0

In [74]:
fig = plt.figure(figsize = (12.5,10))
#plt.plot(prim_haloprop_bins[bin_slice][sorted_idxs], baseline_result[bin_slice][sorted_idxs])
ax = sns.boxplot(x = prim_haloprop_bins[bin_slice], y = final_result_yao[bin_slice], palette=pal)
sns.stripplot(x = prim_haloprop_bins[bin_slice][random_idxs], y = final_result_yao[bin_slice][random_idxs],
              jitter=True, size=4, color=".3", linewidth=0)
sns.plt.title('Yao Percentile HOD Boxplot Central Occupation')
sns.plt.xlabel('Halo Mass Bin #')
sns.plt.ylabel('Halo Occupation')


Out[74]:
<matplotlib.text.Text at 0x7fd270638e90>

In [ ]: